This R script is used to merge RS data downloaded the Google Drive folder shared by Bea with the ReSurvey database.
library(tidyverse)
library(here)
library(lubridate)
library(dtplyr)
library(sf)
library(knitr)
Files downloaded from folder Drive/MOTIVATE-EVEREST/1.VALIDATION/db_Europe/S2/Ene-dic
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/S2/Indices"
# List CSV files
csv_files <- list.files(folder_path, full.names = TRUE, recursive = TRUE)
# Function to extract biogeo and unit from the filename
extract_info <- function(filename) {
first_word <- strsplit(filename, "_")[[1]][1]
biogeo <- str_extract(first_word, "^(ALP|ARC|ATL|BOR|CON|MED|PANONIA)")
unit <- str_remove(first_word, biogeo)
if (is.na(unit) || unit == "") unit <- NA_character_
list(biogeo = biogeo, unit = unit)
}
# Read and process each file
data_list <- lapply(csv_files, function(file) {
info <- extract_info(basename(file)) # Use only the filename
# Read the file
df <- read_csv(file) %>%
# Remove columns that give column type problems when combining data
select(-starts_with("EUNIS"), -starts_with("ReSurvey")) %>%
mutate(biogeo = info$biogeo, unit = info$unit)
return(df)
})
Rows: 1978 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 205 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 41 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 163 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning :One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 5074 Columns: 90
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (47): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 922 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (51): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 95 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 108 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 1417 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 107 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 918 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 125 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning :One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 1827 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 130 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 205 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 8 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (10): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, RS_CODE, ReSu...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (33): EUNISa_2_d, EUNISa_3, EUNISa_3_d, EUNISa_4, EUNISa_4_d, EUNISb, EUN...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 8 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 1188 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine all data
data_RS_S2 <- bind_rows(data_list)
# View the resulting tibble
print(data_RS_S2)
# Counts per biogeo and unit
print(data_RS_S2 %>% count(biogeo, unit), n = 100)
Keep all indices and metrics in case they are useful.
data_RS_S2 <- data_RS_S2 %>%
# Keep the columns we need
select(PlotObserv, biogeo, unit, year, Lat_update, Lon_update,
starts_with("NDVI"), starts_with("NDMI"), starts_with("NDWI"),
starts_with("EVI"), starts_with("SAVI")) %>%
# Rename Lat and Lon, these are only kept in case there is difference with
# those in the ReSurvey database due to updates based on Ilona's info
rename(Lat_RS = Lat_update, Lon_RS = Lon_update) %>%
# Same for year
rename(year_RS = year) %>%
# Add column source
mutate(source = "S2")
Files downloaded from folder Drive/MOTIVATE-EVEREST/1.VALIDATION/db_Europe/S2/Phenology
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/S2/Phenology"
csv_files <- list.files(folder_path, pattern = "\\.csv$", full.names = TRUE,
recursive = TRUE)
# Function to extract biogeo and unit from the filename
extract_info <- function(filename) {
first_word <- strsplit(filename, "_")[[1]][1]
biogeo <- str_extract(first_word,
"^(ALP|ANAT|ARC|ATL|BLACK|BOR|CON|MAC|MED|PANONIA|STEPP)")
unit <- str_remove(first_word, biogeo)
if (is.na(unit) || unit == "") unit <- NA_character_
list(biogeo = biogeo, unit = unit)
}
# Read and process each file
data_list <- lapply(csv_files, function(file) {
info <- extract_info(basename(file)) # Use only the filename
# Read the file
df <- read_csv(file) %>%
# Remove columns that give column type problems when combining data
select(-starts_with("EUNIS"), -starts_with("ReSurvey")) %>%
mutate(biogeo = info$biogeo, unit = info$unit)
return(df)
})
Rows: 205 Columns: 161
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, ...
dbl (118): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_diff_max, 0_NDV...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNI...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 41 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 163 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 95 Columns: 161
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, ...
dbl (118): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_diff_max, 0_NDV...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNI...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 108 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 107 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 125 Columns: 161
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, ...
dbl (118): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_diff_max, 0_NDV...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNI...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 205 Columns: 161
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, ...
dbl (118): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_diff_max, 0_NDV...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNI...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 8 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (10): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, RS_CODE, ReSu...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (33): EUNISa_2_d, EUNISa_3, EUNISa_3_d, EUNISa_4, EUNISa_4_d, EUNISb, EUN...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 8 Columns: 161
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, ...
dbl (118): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_diff_max, 0_NDV...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNI...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 0 Columns: 0
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine all data
data_RS_S2_phen <- bind_rows(data_list)
# View the resulting tibble
print(data_RS_S2_phen)
# Counts per biogeo and unit
print(data_RS_S2_phen %>% count(biogeo, unit), n = 100)
data_RS_S2_phen <- data_RS_S2_phen %>%
# Keep the columns we need:
# Remove Lat and Lon and year, in case there is difference with
# those in the ReSurvey database due to updates based on Ilona's info,
# we have Lat_RS, Lon_RS and year_RS from data_RS_S2
select(PlotObserv, biogeo, unit, contains("NDVI"), contains("EVI"),
contains("SAVI"), contains("NDMI"),contains("NDWI")) %>%
rename_with(~ sub("^[0-9]+_", "", .), .cols = matches("^[0-9]+_")) %>%
# Add column source
mutate(source = "S2") %>%
# Remove these cols that I'm not sure what they are about
select(-NDVI_diff_max, - NDVI_diff_min, -NDMI_diff_max, - NDMI_diff_min,
-NDWI_diff_max, - NDWI_diff_min, -SAVI_diff_max, - SAVI_diff_min,
-EVI_diff_max, - EVI_diff_min)
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/Landsat/Indices"
# List CSV files
csv_files <- list.files(folder_path, full.names = TRUE, recursive = TRUE)
# Function to extract biogeo and unit from the filename
extract_info <- function(filename) {
first_word <- strsplit(filename, "_")[[1]][1]
biogeo <- str_extract(first_word, "^(ALP|ARC|ATL|BOR|CON|MED|PANONIA)")
unit <- str_remove(first_word, biogeo)
if (unit == "") unit <- NA_character_
list(biogeo = biogeo, unit = unit)
}
# Read and process each file
data_list <- lapply(csv_files, function(file) {
info <- extract_info(basename(file)) # Use only the filename
# Read the file
df <- read_csv(file) %>%
# Remove columns that give column type problems when combining data
select(-starts_with("EUNIS"), -starts_with("ReSurvey")) %>%
mutate(biogeo = info$biogeo, unit = info$unit)
return(df)
})
Warning :One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 5213 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 423 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 138 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 117 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 213 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 64 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 72 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning :One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 9447 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning :One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 6432 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 96 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (24): system:index, EUNISa, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (19): EUNISa_1, EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUN...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 99 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 185 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 198 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 20 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (12): system:index, EUNISa, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (31): EUNISa_1, EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 2394 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 160 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning :One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 2068 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning :One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 9807 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (26): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (17): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc_1, EUN...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 308 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning :One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 16171 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 2239 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 82 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (13): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (51): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 10 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 422 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 393 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 12 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (10): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, RS_CODE, ReSu...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (33): EUNISa_2_d, EUNISa_3, EUNISa_3_d, EUNISa_4, EUNISa_4_d, EUNISb, EUN...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 9 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 5 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 37 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (13): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (51): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 2042 Columns: 93
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90,...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine all data
data_RS_Landsat <- bind_rows(data_list)
# View the resulting tibble
print(data_RS_Landsat)
# Counts per biogeo and unit
print(data_RS_Landsat %>% count(biogeo, unit), n = 100)
Keep all indices and metrics in case they are useful.
data_RS_Landsat <- data_RS_Landsat %>%
# Keep the columns we need
select(PlotObserv, biogeo, unit, year, Lat_update, Lon_update,
starts_with("NDVI"), starts_with("NDMI"), starts_with("NDWI"),
starts_with("EVI"), starts_with("SAVI")) %>%
# Rename Lat and Lon, these are only kept in case there is difference with
# those in the ReSurvey database due to updates based on Ilona's info
rename(Lat_RS = Lat_update, Lon_RS = Lon_update) %>%
# Same for year
rename(year_RS = year) %>%
# Add column source
mutate(source = "Landsat")
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/Landsat/Phenology"
csv_files <- list.files(folder_path, pattern = "\\.csv$", full.names = TRUE,
recursive = TRUE)
# Function to extract biogeo and unit from the filename
extract_info <- function(filename) {
first_word <- strsplit(filename, "_")[[1]][1]
biogeo <- str_extract(first_word,
"^(ALP|ANAT|ARC|ATL|BLACK|BOR|CON|MAC|MED|PANONIA|STEPP)")
unit <- str_remove(first_word, biogeo)
if (is.na(unit) || unit == "") unit <- NA_character_
list(biogeo = biogeo, unit = unit)
}
# Read and process each file
data_list <- lapply(csv_files, function(file) {
info <- extract_info(basename(file)) # Use only the filename
# Read the file
df <- read_csv(file) %>%
# Remove columns that give column type problems when combining data
select(-starts_with("EUNIS"), -starts_with("ReSurvey")) %>%
mutate(biogeo = info$biogeo, unit = info$unit)
return(df)
})
Rows: 138 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 99 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 185 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 198 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 20 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (12): system:index, EUNISa, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (31): EUNISa_1, EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 160 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 308 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 82 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (13): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (73): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 10 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 422 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 12 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (10): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, RS_CODE, ReSu...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (33): EUNISa_2_d, EUNISa_3, EUNISa_3_d, EUNISa_4, EUNISa_4_d, EUNISb, EUN...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 9 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 5 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (72): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 37 Columns: 115
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (13): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, E...
dbl (73): 0_NDVI_GSD, 0_NDVI_browning_doy, 0_NDVI_cv, 0_NDVI_doy_max, 0_NDVI_...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNIS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine all data
data_RS_Landsat_phen <- bind_rows(data_list)
# View the resulting tibble
print(data_RS_Landsat_phen)
# Counts per biogeo and unit
print(data_RS_Landsat_phen %>% count(biogeo, unit), n = 100)
data_RS_Landsat_phen <- data_RS_Landsat_phen %>%
# Keep the columns we need:
# Remove Lat and Lon and year, in case there is difference with
# those in the ReSurvey database due to updates based on Ilona's info,
# we have Lat_RS, Lon_RS and year_RS from data_RS_S2
select(PlotObserv, biogeo, unit, contains("NDVI"), contains("EVI"),
contains("SAVI"), contains("NDMI"),contains("NDWI")) %>%
rename_with(~ sub("^[0-9]+_", "", .), .cols = matches("^[0-9]+_")) %>%
# Add column source
mutate(source = "Landsat")
data_RS_CH <- read_csv(
"C:/Data/MOTIVATE/MOTIVATE_RS_data/Canopy_Height_1m/Europe_points_CanopyHeight_1m.csv")
Rows: 425310 Columns: 8
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (2): system:index, .geo
dbl (6): Lat_update, Lon_update, canopy_height, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_RS_CH
data_RS_CH <- data_RS_CH %>%
# Keep the columns we need
select(obs_unique, canopy_height)
In this file, there is the correspondence obs_unique - PlotObservationID.
db_Europa <- read_csv(
here("..", "DB_first_check", "data", "clean","db_Europa_20250107.csv")
)
Rows: 425310 Columns: 12
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Country, RS_CODE, ReSurvey site, ReSurvey plot, Expert System, Locat...
dbl (6): PlotObservationID, Lon_updated, Lat_updated, plot_unique_id, year, o...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
This is the ReSurvey database after updates (to be continued).
db_resurv <- read_tsv(
here("..", "DB_first_check","data", "clean","db_resurv_updated_clean.csv"),
col_types = cols(
# Dynamically specify EUNIS columns as character
.default = col_guess(), # Default guessing for other columns
EUNISa = col_character(),
EUNISb = col_character(),
EUNISc = col_character(),
EUNISd = col_character(),
EUNISa_1 = col_character(),
EUNISa_2 = col_character(),
EUNISa_3 = col_character(),
EUNISa_4 = col_character(),
EUNISb_1 = col_character(),
EUNISb_2 = col_character(),
EUNISb_3 = col_character(),
EUNISb_4 = col_character(),
EUNISc_1 = col_character(),
EUNISc_2 = col_character(),
EUNISc_3 = col_character(),
EUNISc_4 = col_character(),
EUNISd_1 = col_character(),
EUNISd_2 = col_character(),
EUNISd_3 = col_character(),
EUNISd_4 = col_character(),
EUNISa_1_descr = col_character(),
EUNISb_1_descr = col_character(),
EUNISc_1_descr = col_character(),
EUNISd_1_descr = col_character(),
EUNIS_assignation = col_character(),
EUNISa_2_descr = col_character(),
EUNISa_3_descr = col_character(),
EUNISa_4_descr = col_character(),
EUNISb_2_descr = col_character(),
EUNISb_3_descr = col_character(),
EUNISb_4_descr = col_character(),
EUNISc_2_descr = col_character(),
EUNISc_3_descr = col_character(),
EUNISc_4_descr = col_character(),
EUNISd_2_descr = col_character(),
EUNISd_3_descr = col_character(),
EUNISd_4_descr = col_character()
)
)
No parsing issues!
Get sample of ReSurvey database:
db_Europa_sample <- left_join(
db_resurv %>%
select(PlotObservationID, RS_CODE, `ReSurvey site`, `ReSurvey plot`,
Lon_updated,Lat_updated, year,date,
starts_with("EUNIS"), `Location method`) %>%
select(-EUNIS_assignation),
db_Europa %>%
select(PlotObservationID, Lon_updated, Lat_updated, year, obs_unique_id)
)
Joining with `by = join_by(PlotObservationID, Lon_updated, Lat_updated, year)`
print(db_Europa_sample, width = Inf)
Add column PLOT to data to identify unique plots:
db_Europa_sample <- db_Europa_sample %>%
# Original names give problems, create new vars
mutate(RS_site = `ReSurvey site`, RS_plot = `ReSurvey plot`) %>%
# Convert to data.table for faster processing
lazy_dt() %>%
# Group by the 3 vars that uniquely identify each plot
group_by(RS_CODE, RS_site, RS_plot) %>%
# Create a new variable PLOT for each group
mutate(PLOT = .GRP) %>%
# Convert back to tibble
as_tibble() %>%
# Remove unneeded vars
select(-RS_site, -RS_plot)
Keep only habitats F, R, S and Q:
db_Europa_allobs <- db_Europa_sample %>%
dplyr::filter(EUNISa_1 %in% c("T", "R", "S", "Q"))
Save as csv for me to use in GEE:
write_csv(db_Europa_allobs,
file = "data/clean/db_Europa_allobst.csv")
Save as shp to merge with bioregions:
# Convert to sf object
db_Europa_allobs_sf <- st_as_sf(db_Europa_allobs,
coords = c("Lon_updated", "Lat_updated"),
crs = 4326) # WGS84
st_write(db_Europa_allobs_sf,
"C:/GIS/MOTIVATE/shapefiles/db_Europa_allobs_sf.shp")
Warning :Field names abbreviated for ESRI Shapefile driver
Writing layer `db_Europa_allobs_sf' to data source
`C:/GIS/MOTIVATE/shapefiles/db_Europa_allobs_sf.shp' using driver `ESRI Shapefile'
Writing 188477 features with 45 fields and geometry type Point.
Warning :GDAL Message 1: One or several characters couldn't be converted correctly from UTF-8 to ISO-8859-1. This warning will not be emitted anymore.
And, for each PLOT, keep only the last resurvey:
db_Europa_sample_latest <- db_Europa_sample %>%
dplyr::filter(EUNISa_1 %in% c("T", "R", "S", "Q")) %>%
group_by(PLOT) %>%
dplyr::filter(date == max(date)) %>%
ungroup()
Save as csv for Bea:
write_csv(db_Europa_sample_latest,
file = "data/clean/db_Europa_sample_latest.csv")
Save as shp to merge with bioregions:
# # Convert to sf object
# db_Europa_sample_latest_sf <- st_as_sf(db_Europa_sample_latest,
# coords = c("Lon_updated", "Lat_updated"),
# crs = 4326) # WGS84
# st_write(db_Europa_sample_latest_sf,
# "C:/GIS/MOTIVATE/shapefiles/db_Europa_sample_latest_sf.shp")
Get only the columns PlotObservationID (original unique identifier) and obs_unique_id (unique identified created by me).
db_Europa_sample_latest <- db_Europa_sample_latest %>%
select(PlotObservationID, obs_unique_id)
data_RS_S2_ID <- db_Europa_sample_latest %>%
right_join(data_RS_S2 %>%
# Rename to be able to join on this column
rename(PlotObservationID = PlotObserv))
Joining with `by = join_by(PlotObservationID)`
Now we have PlotObservationID in data_RS_S2_ID.
data_RS_S2_phen_ID <- db_Europa_sample_latest %>%
right_join(data_RS_S2_phen %>%
# Rename to be able to join on this column
rename(PlotObservationID = PlotObserv))
Joining with `by = join_by(PlotObservationID)`
Now we have PlotObservationID in data_RS_S2_phen_ID
data_RS_Landsat_ID <- db_Europa_sample_latest %>%
right_join(data_RS_Landsat %>%
# Rename to be able to join on this column
rename(PlotObservationID = PlotObserv))
Joining with `by = join_by(PlotObservationID)`
Now we have PlotObservationID in data_RS_Landsat_ID.
data_RS_Landsat_phen_ID <- db_Europa_sample_latest %>%
right_join(data_RS_Landsat_phen %>%
# Rename to be able to join on this column
rename(PlotObservationID = PlotObserv))
Joining with `by = join_by(PlotObservationID)`
Now we have PlotObservationID in data_RS_Landsat_phen_ID
data_RS_CH_ID <- db_Europa %>%
select(PlotObservationID, obs_unique_id) %>%
right_join(data_RS_CH %>%
# Rename to be able to join on this column
rename(obs_unique_id = obs_unique))
Joining with `by = join_by(obs_unique_id)`
Now we have PlotObservationID in data_RS_CH_ID.
For some points, there is data both from S2 and Landsat. In those cases, the idea was to use the S2 data because it is more precise (10 m vs 30 m). But we will keep both.
data_RS_S2_ID <- data_RS_S2_ID %>%
rename_with(~ paste0(., "_S2"), starts_with("NDVI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("NDMI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("NDWI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("EVI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("SAVI")) %>%
select(-source)
data_RS_Landsat_ID <- data_RS_Landsat_ID %>%
rename_with(~ paste0(., "_Landsat"), starts_with("NDVI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("NDMI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("NDWI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("EVI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("SAVI")) %>%
select(-source)
data_RS_S2_phen_ID <- data_RS_S2_phen_ID %>%
rename_with(~ paste0(., "_S2"), starts_with("NDVI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("NDMI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("NDWI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("EVI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("SAVI")) %>%
select(-source) %>%
# Rename mean and std that correspond to vegetative period
rename_with(~ gsub("_mean", "_mean_veg", .), .cols = contains("_mean")) %>%
rename_with(~ gsub("_std", "_std_veg", .), .cols = contains("_std"))
data_RS_Landsat_phen_ID <- data_RS_Landsat_phen_ID %>%
rename_with(~ paste0(., "_Landsat"), starts_with("NDVI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("NDMI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("NDWI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("EVI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("SAVI")) %>%
select(-source) %>%
# Rename mean and std that correspond to vegetative period
rename_with(~ gsub("_mean", "_mean_veg", .), .cols = contains("_mean")) %>%
rename_with(~ gsub("_std", "_std_veg", .), .cols = contains("_std"))
Join S2, S2_phen and Landsat data:
data_RS <- data_RS_S2_ID %>%
full_join(data_RS_Landsat_ID) %>%
full_join(data_RS_S2_phen_ID) %>%
full_join(data_RS_Landsat_phen_ID)
Joining with `by = join_by(PlotObservationID, obs_unique_id, biogeo, unit, year_RS, Lat_RS,
Lon_RS)`
Joining with `by = join_by(PlotObservationID, obs_unique_id, biogeo, unit, NDVI_max_S2,
NDMI_max_S2, NDWI_max_S2, EVI_max_S2, SAVI_max_S2)`
Joining with `by = join_by(PlotObservationID, obs_unique_id, biogeo, unit)`
Number of observations with NDVI_max data from both S2 and Landsat:
nrow(data_RS %>% filter(!is.na(NDVI_max_S2) & !is.na(NDVI_max_Landsat)))
[1] 14518
Difference between NDVI_max values from S2 and Landsat:
data_RS %>% filter(!is.na(NDVI_max_S2) & !is.na(NDVI_max_Landsat)) %>%
mutate(diff_NDVI_max = abs(NDVI_max_S2 - NDVI_max_Landsat)) %>%
ggplot(aes(x = diff_NDVI_max, fill = paste(biogeo, unit, sep = "-"))) +
geom_histogram(color = "black") +
facet_wrap(~ paste(biogeo, unit, sep = "-")) + theme(legend.position = "none")
data_RS %>% filter(!is.na(NDMI_max_S2) & !is.na(NDMI_max_Landsat)) %>%
mutate(diff_NDMI_max = abs(NDMI_max_S2 - NDMI_max_Landsat)) %>%
ggplot(aes(x = diff_NDMI_max, fill = paste(biogeo, unit, sep = "-"))) +
geom_histogram(color = "black") +
facet_wrap(~ paste(biogeo, unit, sep = "-")) + theme(legend.position = "none")
There is a large difference between NDVI values from S2 and Landsat. So far, use the S2 data, but checking with Bea / Jose.
When values are available from both satellites, use S2:
data_RS <- data_RS %>%
mutate(across(
matches("^(NDVI|NDMI|NDWI|EVI|SAVI)_.+_S2$"),
~ case_when(
# If both the current column and the corresponding Landsat column are NA,
# set to NA_real_
is.na(.x) & is.na(get(sub("_S2$", "_Landsat", cur_column()))) ~ NA_real_,
# If the corresponding Landsat column is NA, use the current column's value
is.na(get(sub("_S2$", "_Landsat", cur_column()))) ~ .x,
# If the current column is NA, use the corresponding Landsat column's value
is.na(.x) ~ get(sub("_S2$", "_Landsat", cur_column())),
# Otherwise, use the current column's value
TRUE ~ .x
), .names = "{col}_combined")) %>%
rename_with(~ sub("_S2_combined$", "", .), matches("_S2_combined$"))
Get number of points per biogeo and unit:
npoints_bioregion_R <- data_RS %>% count(biogeo, unit) %>%
mutate(npoints_R = n) %>%
select(-n)
Read number of points per biogeo and unit from GIS:
npoints_bioregion_GIS <- read_delim(
"data/clean/Npoints_bioregion.csv", delim = ";"
) %>%
select (BIOGEO, UNIT, Join_Count) %>%
mutate(biogeo = BIOGEO, unit = str_remove(UNIT, BIOGEO),
npoints_GIS = Join_Count) %>%
mutate(biogeo = ifelse(biogeo == "PAN", "PANONIA", biogeo)) %>%
mutate(unit = ifelse(biogeo == "PANONIA", NA, unit)) %>%
select(- BIOGEO, - UNIT, -Join_Count)
Rows: 42 Columns: 54
── Column specification ───────────────────────────────────────────────────────────────────────────
Delimiter: ";"
chr (44): name, BIOGEO, UNIT, RS_CODE, RSrvyst, RSrvypl, date, EUNISa, EUNISb, EUNISc, EUNISd, ...
dbl (5): OID_, Join_Count, TARGET_FID, code, PLOT
num (5): AREA, PltObID, year, Shape_Length, Shape_Area
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Merge both and see differences in n points:
npoints_bioregion_merged <- full_join(npoints_bioregion_R,
npoints_bioregion_GIS)
Joining with `by = join_by(biogeo, unit)`
Nice table to email to Bea:
kable(print(npoints_bioregion_merged %>% arrange(biogeo, unit), n = 100))
| biogeo | unit | npoints_R | npoints_GIS |
|---|---|---|---|
| ALP | ALPS | 5214 | 5360 |
| ALP | ATRA | 628 | 423 |
| ALP | BALKAN | 138 | 138 |
| ALP | DINARIC | NA | 0 |
| ALP | ENNINE | 51 | 54 |
| ALP | NORDIC | 158 | 117 |
| ALP | PYR | 376 | 213 |
| ALP | ROMANIAN | 64 | 64 |
| ANA | TURKEY | NA | 0 |
| ARC | ICELAND | NA | 0 |
| ARC | NORWAY | 72 | 106 |
| ATL | BENELUX | 9447 | 9530 |
| ATL | BRITAIN | 6432 | 6761 |
| ATL | FRANCE | 96 | 97 |
| ATL | IBERIA | 194 | 99 |
| ATL | IRELAND | NA | 0 |
| ATL | NORDIC | NA | 0 |
| BLS | BLACKSEA | NA | 0 |
| BOR | BALTIC | 293 | 185 |
| BOR | FINLAND | 198 | 240 |
| BOR | NORDIC | 20 | 20 |
| CON | AUSTRIA | 2394 | 2439 |
| CON | BALKAN | 267 | 160 |
| CON | FRANCE | 2068 | 2091 |
| CON | GERMANY | 9807 | 9924 |
| CON | ITALICA | 433 | 312 |
| CON | NORDIC | 16171 | 16172 |
| CON | POLAND | 2239 | 2241 |
| CON | ROMANIA | 82 | 82 |
| MAC | ARONESIA | NA | 0 |
| MED | BALEAR | NA | 0 |
| MED | BALKAN | NA | 0 |
| MED | FRANCE | NA | 0 |
| MED | GREECE | 10 | 10 |
| MED | IBERIA | 422 | 422 |
| MED | ITALICA | 598 | 395 |
| MED | PORT | 20 | 12 |
| MED | SICILIA | 17 | 9 |
| MED | TIRRENO | 5 | 5 |
| MED | TURKEY | 37 | 37 |
| PANONIA | NA | 2042 | 2046 |
| STE | PPIC | NA | 0 |
db_resurv_RS <- db_resurv %>%
left_join(data_RS %>% select(-obs_unique_id)) %>%
left_join(data_RS_CH_ID %>% select(-obs_unique_id)) %>%
mutate(S2_data = !is.na(NDVI_max_S2) & !is.na(NDMI_max_S2),
S2_phen_data = !is.na(NDVI_GSD_S2) & !is.na(NDMI_GSD_S2),
RS_data = !is.na(NDVI_max) & !is.na(NDMI_max),
RS_phen_data = !is.na(NDVI_GSD) & !is.na(NDMI_GSD),
CH_data = !is.na(canopy_height))
Joining with `by = join_by(PlotObservationID)`
Joining with `by = join_by(PlotObservationID)`
# Not removing cols for _S2 and _Landsat
db_resurv_RS %>% count(S2_data)
db_resurv_RS %>% count(S2_phen_data)
db_resurv_RS %>% count(RS_data)
db_resurv_RS %>% count(RS_phen_data)
db_resurv_RS %>% count(CH_data)
Save clean file for analyses (to be updated continuously due to updates in ReSurvey database and updates on RS data).
write_tsv(db_resurv_RS,here("data", "clean","db_resurv_RS_20250617.csv"))
sessionInfo()
R version 4.5.0 (2025-04-11 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)
Matrix products: default
LAPACK version 3.12.1
locale:
[1] LC_COLLATE=Spanish_Spain.utf8 LC_CTYPE=Spanish_Spain.utf8 LC_MONETARY=Spanish_Spain.utf8
[4] LC_NUMERIC=C LC_TIME=Spanish_Spain.utf8
time zone: Europe/Madrid
tzcode source: internal
attached base packages:
[1] stats4 grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] knitr_1.50 pROC_1.18.5 randomForest_4.7-1.2 moreparty_0.4
[5] caret_7.0-1 lattice_0.22-6 ggparty_1.0.0 partykit_1.2-24
[9] libcoin_1.0-10 party_1.3-18 strucchange_1.5-4 sandwich_3.1-1
[13] zoo_1.8-14 modeltools_0.2-24 mvtnorm_1.3-3 ggeffects_2.2.1
[17] car_3.1-3 carData_3.0-5 lmerTest_3.1-3 lme4_1.1-37
[21] Matrix_1.7-3 dtplyr_1.3.1 rnaturalearth_1.0.1 sf_1.0-20
[25] scales_1.4.0 readxl_1.4.5 gridExtra_2.3 here_1.0.1
[29] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[33] purrr_1.0.4 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
[37] ggplot2_3.5.2 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.17.1 jsonlite_2.0.0 magrittr_2.0.3
[5] TH.data_1.1-3 farver_2.1.2 nloptr_2.2.1 rmarkdown_2.29
[9] vctrs_0.6.5 minqa_1.2.8 terra_1.8-50 htmltools_0.5.8.1
[13] varImp_0.4 cellranger_1.1.0 Formula_1.2-5 sass_0.4.10
[17] parallelly_1.44.0 bslib_0.9.0 KernSmooth_2.23-26 htmlwidgets_1.6.4
[21] plyr_1.8.9 cachem_1.1.0 mime_0.13 lifecycle_1.0.4
[25] iterators_1.0.14 pkgconfig_2.0.3 R6_2.6.1 fastmap_1.2.0
[29] shiny_1.10.0 rbibutils_2.3 future_1.49.0 digest_0.6.37
[33] numDeriv_2016.8-1.1 rprojroot_2.0.4 labeling_0.4.3 timechange_0.3.0
[37] httr_1.4.7 abind_1.4-8 compiler_4.5.0 proxy_0.4-27
[41] bit64_4.6.0-1 withr_3.0.2 backports_1.5.0 DBI_1.2.3
[45] MASS_7.3-65 lava_1.8.1 classInt_0.4-11 ModelMetrics_1.2.2.2
[49] tools_4.5.0 units_0.8-7 httpuv_1.6.16 future.apply_1.11.3
[53] nnet_7.3-20 glue_1.8.0 promises_1.3.2 nlme_3.1-168
[57] inum_1.0-5 checkmate_2.3.2 reshape2_1.4.4 generics_0.1.4
[61] recipes_1.3.0 gtable_0.3.6 tzdb_0.5.0 class_7.3-23
[65] data.table_1.17.2 hms_1.1.3 utf8_1.2.5 coin_1.4-3
[69] foreach_1.5.2 pillar_1.10.2 vroom_1.6.5 later_1.4.2
[73] splines_4.5.0 bit_4.6.0 survival_3.8-3 tidyselect_1.2.1
[77] reformulas_0.4.1 xfun_0.52 measures_0.3 hardhat_1.4.1
[81] timeDate_4041.110 matrixStats_1.5.0 DT_0.33 phosphoricons_0.2.1
[85] stringi_1.8.7 yaml_2.3.10 boot_1.3-31 shinyWidgets_0.9.0
[89] evaluate_1.0.3 codetools_0.2-20 cli_3.6.5 rpart_4.1.24
[93] xtable_1.8-4 Rdpack_2.6.4 jquerylib_0.1.4 Rcpp_1.0.14
[97] globals_0.18.0 parallel_4.5.0 rclipboard_0.2.1 gower_1.0.2
[101] listenv_0.9.1 ipred_0.9-15 prodlim_2025.04.28 e1071_1.7-16
[105] crayon_1.5.3 insight_1.3.0 rlang_1.1.6 multcomp_1.4-28